Concepts taken from Advanced R.

Assignment operator and pipes

y = 1:10
y
##  [1]  1  2  3  4  5  6  7  8  9 10
z <- 1:10
z
##  [1]  1  2  3  4  5  6  7  8  9 10
mean(x = 1:10)
## [1] 5.5
x
## Error in eval(expr, envir, enclos): object 'x' not found
mean(x <- 1:10)
## [1] 5.5
x
##  [1]  1  2  3  4  5  6  7  8  9 10
system.time(x = lapply(1:10, function(x) {Sys.sleep(1); return(x)}))
## Error in system.time(x = lapply(1:10, function(x) {: unused argument (x = lapply(1:10, function(x) {
##     Sys.sleep(1)
##     return(x)
## }))
system.time(x <- lapply(1:10, function(x) {Sys.sleep(1); return(x)}))
##    user  system elapsed 
##   0.015   0.001  10.036
covid <- read.csv("../../Data/covid19.csv")

str(covid)
## 'data.frame':    18620 obs. of  8 variables:
##  $ Country.Region: chr  "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
##  $ Province.State: chr  "" "" "" "" ...
##  $ Lat           : num  33 33 33 33 33 33 33 33 33 33 ...
##  $ Long          : num  65 65 65 65 65 65 65 65 65 65 ...
##  $ Date          : chr  "2020-01-22" "2020-01-23" "2020-01-24" "2020-01-25" ...
##  $ Confirmed     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Recovered     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Deaths        : int  0 0 0 0 0 0 0 0 0 0 ...
covid[covid$Country.Region == "Switzerland" & covid$Confirmed == max(covid[covid$Country.Region == "Switzerland", "Confirmed"]),"Date"]
## [1] "2020-04-06"
covid |> 
    subset(Country.Region == "Switzerland") |> 
    subset(Confirmed == max(Confirmed)) |> 
    getElement("Date")
## [1] "2020-04-06"

Same with tidyverse.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
covid %>%
    filter(Country.Region == "Switzerland") %>%
    filter(Confirmed == max(Confirmed)) %>%
    select("Date")
##         Date
## 1 2020-04-06

Pipe into variable

covid %>%
    filter(Country.Region == "Switzerland") %>%
    filter(Confirmed == max(Confirmed)) %>%
    select("Date") -> max_conf_date
max_conf_date
##         Date
## 1 2020-04-06
covid %>%
    filter(Country.Region == "Switzerland") %>%
    ggplot() + geom_line(aes(Date, Confirmed, color = Country.Region, group = 1)) +
    theme_classic(base_size = 15) +
    theme(axis.text.x = element_text(angle = 90))

Efficient coding

Reading in data

Write large data frame

matrix(rnorm(n = 10000000), ncol = 10) %>%
    as.data.frame() %>%
    write_csv(file = "../../Data/large_test_data.csv")

read.csv vs read_cvs vs data.table vs vroom

library(bench)
library(data.table)
library(vroom)
library(DT)

bench::mark(
    cur_data <- read.csv("../../Data/large_test_data.csv"),
    cur_data <- readr::read_csv("../../Data/large_test_data.csv"),
    cur_data <- data.table::fread("../../Data/large_test_data.csv"),
    cur_data <- vroom::vroom("../../Data/large_test_data.csv"),
    check = FALSE
) %>% DT::datatable()
## Warning: Some expressions had a GC in every iteration; so filtering is disabled.
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Loops and apply

x <- list(entr1 = 1:10, entr2 = 20:30)

lapply(x, `[[`, 3)
## $entr1
## [1] 3
## 
## $entr2
## [1] 22
lapply(x, `[[<-`, 3, 120)
## $entr1
##  [1]   1   2 120   4   5   6   7   8   9  10
## 
## $entr2
##  [1]  20  21 120  23  24  25  26  27  28  29  30

Parallelisation

Object-oriented coding

Mainly relevant for development and not analysis but good to understand

S3 vs S4 vs R6

Very opinionated and preference due to developing for Bioconductor –> use S4

Common pitfalls

Vector recycling

the drop argument

Recommendations on coding style

Avoid copying code and modularize (the source command)

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DT_0.23           vroom_1.5.7       data.table_1.14.2 bench_1.1.2      
##  [5] forcats_0.5.1     stringr_1.4.0     dplyr_1.0.9       purrr_0.3.4      
##  [9] readr_2.1.2       tidyr_1.2.0       tibble_3.1.7      ggplot2_3.3.6    
## [13] tidyverse_1.3.1  
## 
## loaded via a namespace (and not attached):
##  [1] lubridate_1.8.0   assertthat_0.2.1  digest_0.6.29     utf8_1.2.2       
##  [5] R6_2.5.1          cellranger_1.1.0  backports_1.4.1   reprex_2.0.1     
##  [9] evaluate_0.15     httr_1.4.3        highr_0.9         pillar_1.7.0     
## [13] rlang_1.0.2       readxl_1.4.0      rstudioapi_0.13   jquerylib_0.1.4  
## [17] rmarkdown_2.14    labeling_0.4.2    htmlwidgets_1.5.4 bit_4.0.4        
## [21] munsell_0.5.0     broom_0.8.0       compiler_4.2.0    modelr_0.1.8     
## [25] xfun_0.31         pkgconfig_2.0.3   htmltools_0.5.2   tidyselect_1.1.2 
## [29] fansi_1.0.3       crayon_1.5.1      tzdb_0.3.0        dbplyr_2.2.0     
## [33] withr_2.5.0       grid_4.2.0        jsonlite_1.8.0    gtable_0.3.0     
## [37] lifecycle_1.0.1   DBI_1.1.2         magrittr_2.0.3    scales_1.2.0     
## [41] profmem_0.6.0     cli_3.3.0         stringi_1.7.6     farver_2.1.0     
## [45] fs_1.5.2          xml2_1.3.3        bslib_0.3.1       ellipsis_0.3.2   
## [49] generics_0.1.2    vctrs_0.4.1       tools_4.2.0       bit64_4.0.5      
## [53] glue_1.6.2        crosstalk_1.2.0   hms_1.1.1         parallel_4.2.0   
## [57] fastmap_1.1.0     yaml_2.3.5        colorspace_2.0-3  rvest_1.0.2      
## [61] knitr_1.39        haven_2.5.0       sass_0.4.1